mindset_plot_with_legend <-ggplot(data = data, aes(x = growth_mindset, group = school_music_elective, fill = school_music_elective)) +geom_density(adjust =1.5, alpha = .8) +scale_fill_manual(values =c("No"="#4E2A84", "Yes"="#E4E0EE")) +theme_ipsum() +labs(x ="Mindset of Music Ability", fill ="Enrolled in School Music?") +theme(legend.position ="bottom", legend.margin =margin(l =-10, unit ="pt"))
Code
mindset_plot <-ggplot(data = data, aes(x = growth_mindset, group = school_music_elective, fill = school_music_elective)) +geom_density(alpha = .8) +#removed "adjust = 1.5" to show more nuance on the density linescale_fill_manual(values =c("No"="#4E2A84", "Yes"="#E4E0EE")) +theme_ipsum() +labs(x ="Mindset of Music Ability", fill ="Enrolled in School Music?") +theme(legend.position ="none", plot.title =element_text(size =10))
Code
active_engagement_plot <-ggplot(data = data, aes(x = active_engagement, group = school_music_elective, fill = school_music_elective)) +geom_density(alpha = .8) +#removed "adjust = 1.5" to show more nuance on the density linescale_fill_manual(values =c("No"="#4E2A84", "Yes"="#E4E0EE")) +theme_ipsum() +labs(x ="Active Engagement", fill ="Enrolled in School Music?") +theme(legend.position ="none", plot.title =element_text(size =10))
# plot1 with modified legendplot1_legend <-ggplot(data, aes(x = growth_mindset, y = school_music_elective, col = school_music_elective)) +geom_point(size =4) +scale_color_manual(values =c("No"="#4E2A84", "Yes"="#E4E0EE"), name ="Enrolled in School Music?") +theme(legend.position ="bottom")# function to extract legend from plotget_only_legend <-function(plot) { plot_table <-ggplot_gtable(ggplot_build(plot)) legend_plot <-which(sapply(plot_table$grobs, function(x) x$name) =="guide-box") legend <- plot_table$grobs[[legend_plot]]return(legend)}# extracting modified legend from plot1 using the above functionlegend <-get_only_legend(plot1_legend)# final combined plot with shared modified legendgrid.arrange(combined_plot, legend, nrow =2, heights =c(10, 1))
Histogram
Code
data %>%ggplot( aes(x=msc_full)) +geom_histogram(binwidth =4, fill ="#4E2A84", color ="#B6ACD1", alpha=0.9) +ggtitle("Global Music Self-Concept") +theme_ipsum() +theme(plot.title =element_text(size=15) )
Code
#Distribution of Music Self-Concept Subscales and Mindsetdata %>%select(active_engagement_z:msc_full_z) %>%multi.hist() +theme_ipsum()
NULL
Code
#Distribution of Music Participation Variablesdata %>%select(schoolChoir:selfTaught) %>%multi.hist() +theme_ipsum()
NULL
Code
filtered_data <-subset(data, school_music_elective =="Yes")ggplot(filtered_data, aes(x = growth_mindset)) +geom_histogram(binwidth =1, fill ="#4E2A84", color ="white", alpha =0.7) +theme_minimal() +labs(title ="Mindset of Students Enrolled in Elective Music Classes",x ="Mindset Score",y ="Frequency")
radarchart(data_for_radar, axistype =1, pcol ="#4E2A84", # Color of the average valuesplwd =2, # Line widthplty =1, # Line typecglcol ="grey", # Color of the grid linescglty =1, # Type of the grid linescglwd =0.8, # Width of the grid linesaxislabcol ="grey", # Color of the axis labelsvlcex =0.8) # Control the size of variable labels
Gender
Code
# Set graphic colorslibrary(RColorBrewer)coul <-brewer.pal(3, "BuPu")colors_border <- coullibrary(scales)
Attaching package: 'scales'
The following objects are masked from 'package:psych':
alpha, rescale
The following object is masked from 'package:viridis':
viridis_pal
The following object is masked from 'package:purrr':
discard
The following object is masked from 'package:readr':
col_factor
Code
colors_in <-alpha(coul,0.3)data_subset_gender <- data %>%select(gender, growth_mindset_z:movement_dance_msc_z) %>%filter(gender %in%c("Male", "Female")) %>%rename("Growth Mindset"= growth_mindset_z,"Movement & Dance"= movement_dance_msc_z,"Ideal Musical Self"= ideal_musical_self_msc_z,"Adaptive Musical Self"= adaptive_musical_self_msc_z,"Music Ability"= musical_ability_msc_z,"Community"= community_msc_z,"Mood Management"= mood_management_msc_z)# Group the data by 'gender' and calculate the mean for each variableaverage_by_gender <- data_subset_gender %>%group_by(gender) %>%summarise(across(everything(), mean, na.rm =TRUE))
Warning: There was 1 warning in `summarise()`.
ℹ In argument: `across(everything(), mean, na.rm = TRUE)`.
ℹ In group 1: `gender = "Female"`.
Caused by warning:
! The `...` argument of `across()` is deprecated as of dplyr 1.1.0.
Supply arguments directly to `.fns` through an anonymous function instead.
# Previously
across(a:b, mean, na.rm = TRUE)
# Now
across(a:b, \(x) mean(x, na.rm = TRUE))
Code
# Check the average data by genderprint(average_by_gender)
# Create the max and min rows based on the entire datasetmax_values <-apply(data_subset_gender[, -which(names(data_subset_gender) =="gender")], 2, max)min_values <-apply(data_subset_gender[, -which(names(data_subset_gender) =="gender")], 2, min)# Combine the max, min, and average values for the two gender groups into a dataframedata_for_radar <-rbind(max_values, min_values, as.data.frame(average_by_gender[,-1]))# Ensure the data is a dataframe and proper column names are retaineddata_for_radar <-as.data.frame(data_for_radar)# Create a radar chart with multiple groups (one for each gender)radarchart(data_for_radar, axistype =1, pcol =c("#4E2A84", "#B6ACD1"),# Different colors for each genderplwd =2, # Line widthplty =1, # Line typecglcol ="grey", # Color of the grid linescglty =1, # Type of the grid linescglwd =0.8, # Width of the grid linesaxislabcol ="grey", # Color of the axis labelsvlcex =0.8) # Control the size of variable labels# Add a legend to distinguish between genderslegend("topright", legend =unique(data_subset_gender$gender), col =c("#4E2A84", "#B6ACD1"), lty =1, lwd =2)
Electives
Code
data_subset_elective <- data %>%select(currentSchoolMusic, growth_mindset_z:movement_dance_msc_z) %>%rename("Growth Mindset"= growth_mindset_z,"Movement & Dance"= movement_dance_msc_z,"Ideal Musical Self"= ideal_musical_self_msc_z,"Adaptive Musical Self"= adaptive_musical_self_msc_z,"Music Ability"= musical_ability_msc_z,"Community"= community_msc_z,"Mood Management"= mood_management_msc_z)# Group the data by 'currentSchoolMusic' and calculate the mean for each variableaverage_by_elective <- data_subset_elective %>%group_by(currentSchoolMusic) %>%summarise(across(everything(), mean, na.rm =TRUE))# Check the average data by electiveprint(average_by_elective)
# A tibble: 4 × 8
currentSchoolMusic `Growth Mindset` `Mood Management` Community
<chr> <dbl> <dbl> <dbl>
1 Band 1.64 -0.796 -0.633
2 Choir 0.861 0.662 0.765
3 No music -0.912 -0.192 -0.188
4 Other -0.0996 1.17 0.519
# ℹ 4 more variables: `Music Ability` <dbl>, `Adaptive Musical Self` <dbl>,
# `Ideal Musical Self` <dbl>, `Movement & Dance` <dbl>
Code
# Create the max and min rows based on the entire datasetmax_values <-apply(data_subset_elective[, -which(names(data_subset_elective) =="currentSchoolMusic")], 2, max)min_values <-apply(data_subset_elective[, -which(names(data_subset_elective) =="currentSchoolMusic")], 2, min)# Combine the max, min, and average values for the gender groups into a dataframedata_for_radar <-rbind(max_values, min_values, as.data.frame(average_by_elective[,-1]))# Ensure the data is a dataframe and proper column names are retaineddata_for_radar <-as.data.frame(data_for_radar)# Create a radar chart with multiple groups (one for each elective)radarchart(data_for_radar, axistype =1, pcol =c("red", "blue", "green", "purple"),# Different colors for each electiveplwd =2, # Line widthplty =1, # Line typecglcol ="grey", # Color of the grid linescglty =1, # Type of the grid linescglwd =0.8, # Width of the grid linesaxislabcol ="grey", # Color of the axis labelsvlcex =0.8) # Control the size of variable labels# Add a legend to distinguish between electiveslegend("topright", legend =unique(data_subset_elective$currentSchoolMusic), col =c("red", "blue", "green", "purple"), lty =1, lwd =2)
Music Elective (Yes or No) $1
Code
data_subset_music_elective <- data %>%select(school_music_elective, growth_mindset, mood_management_msc:movement_dance_msc) %>%rename("Growth Mindset"= growth_mindset,"Movement & Dance"= movement_dance_msc,"Ideal Musical Self"= ideal_musical_self_msc,"Adaptive Musical Self"= adaptive_musical_self_msc,"Music Ability"= musical_ability_msc,"Community"= community_msc,"Mood Management"= mood_management_msc)# Group the data by 'school_music_elective' and calculate the mean for each variableaverage_by_music_elective <- data_subset_music_elective %>%group_by(school_music_elective) %>%summarise(across(everything(), mean, na.rm =TRUE))# Check the average data by music_electiveprint(average_by_music_elective)
# Create the max and min rows based on the entire datasetmax_values <-apply(data_subset_music_elective[, -which(names(data_subset_music_elective) =="school_music_elective")], 2, max)min_values <-apply(data_subset_music_elective[, -which(names(data_subset_music_elective) =="school_music_elective")], 2, min)data_for_radar_music_elective <-as.data.frame(bind_rows(max_values, min_values, average_by_music_elective [,-1]))# Combine the max, min, and average values for the two music_elective groups into a dataframe#data_for_radar_music_elective <- rbind(max_values, min_values, as.data.frame(average_by_music_elective[,-1]))# Ensure the data is a dataframe and proper column names are retained#data_for_radar_music_elective <- as.data.frame(data_for_radar_music_elective)row.names(data_for_radar_music_elective) <-c("Max", "Min", unique(data_subset_music_elective$school_music_elective))# Create a radar chart with multiple groups (one for each music_elective)radarchart(data_for_radar_music_elective, axistype =1, pcol =c("#4E2A84", "#B6ACD1"),# Different colors for each electiveplwd =2, # Line widthplty =1, # Line typecglcol ="grey", # Color of the grid linescglty =1, # Type of the grid linescglwd =0.8, # Width of the grid linesaxislabcol ="grey", # Color of the axis labelsvlcex =0.8) # Control the size of variable labels# Add a legend to distinguish between genderslegend("bottomright", legend =rownames(data_for_radar_music_elective[-c(1,2),]), col =c("#4E2A84", "#B6ACD1"), lty =1, lwd =2, title ="School Music?")# Add titletitle(main ="Average by Enrollment in School Music")
Music Elective (Yes or No) #2
Code
data_subset_music_elective <- data %>%select(school_music_elective, growth_mindset, mood_management_msc:movement_dance_msc) %>%rename("Growth Mindset"= growth_mindset,"Movement & Dance"= movement_dance_msc,"Ideal Musical Self"= ideal_musical_self_msc,"Adaptive Musical Self"= adaptive_musical_self_msc,"Music Ability"= musical_ability_msc,"Community"= community_msc,"Mood Management"= mood_management_msc)# Group the data by 'school_music_elective' and calculate the mean for each variableaverage_by_music_elective <- data_subset_music_elective %>%group_by(school_music_elective) %>%summarise(across(everything(), mean, na.rm =TRUE))# Check the average data by music_electiveprint(average_by_music_elective)
# Calculate max and min valuesmax_values <-apply(data_subset_music_elective[, -which(names(data_subset_music_elective) =="school_music_elective")], 2, max)min_values <-apply(data_subset_music_elective[, -which(names(data_subset_music_elective) =="school_music_elective")], 2, min)# Convert max and min values to data framesmax_values_df <-as.data.frame(t(max_values), stringsAsFactors =FALSE)min_values_df <-as.data.frame(t(min_values), stringsAsFactors =FALSE)# Set explicit row names for max and minrow.names(max_values_df) <-"Max"row.names(min_values_df) <-"Min"# Prepare average values without the first column (school_music_elective)average_values_df <-as.data.frame(average_by_music_elective[,-1], stringsAsFactors =FALSE)# Store the row names for the factors (Yes, No)factor_levels <-unique(data_subset_music_elective$school_music_elective)# Set row names for the average valuesrow.names(average_values_df) <- factor_levels# Combine max, min, and average values into one data framedata_for_radar_music_elective <-rbind(max_values_df, min_values_df, average_values_df)# Now reassign the correct row names explicitly to avoid any swaprow.names(data_for_radar_music_elective) <-c("Max", "Min", factor_levels)# Check the resulting data frameprint(data_for_radar_music_elective)
Growth Mindset Mood Management Community Music Ability
Max 16.00000 24.00000 16.000000 20.00000
Min 4.00000 6.00000 4.000000 5.00000
Yes 11.81992 19.27969 9.417625 10.93870
No 13.30165 19.57025 9.760331 13.50413
Adaptive Musical Self Ideal Musical Self Movement & Dance
Max 16.00000 20.00000 16.000000
Min 4.00000 5.00000 4.000000
Yes 11.70881 12.14559 9.808429
No 12.17355 13.14463 10.235537
Code
# Create a radar chart with multiple groups (one for each music_elective)radarchart(data_for_radar_music_elective, axistype =1, pcol =c("#4E2A84", "#B6ACD1"),# Different colors for each electiveplwd =2, # Line widthplty =1, # Line typecglcol ="grey", # Color of the grid linescglty =1, # Type of the grid linescglwd =0.8, # Width of the grid linesaxislabcol ="grey", # Color of the axis labelsvlcex =0.8) # Control the size of variable labels# Add a legend to distinguish between genderslegend("bottomright", legend =rownames(data_for_radar_music_elective[-c(1,2),]), col =c("#4E2A84", "#B6ACD1"), lty =1, lwd =2, title ="School Music?")# Add titletitle(main ="Average by Enrollment in School Music")
Bar Plots
Music Class Enrollment (total and by school)
Code
#Count and Proportion per classmusic_elective_table <- data %>%count(currentSchoolMusic)%>%mutate("%"=round(n/sum(n)*100,2))#Proportion per school per classround(prop.table(table(data$school,data$currentSchoolMusic),1)*100, 2)
Band Choir No music Other
A 26.40 31.20 18.40 24.00
B 14.81 22.22 62.14 0.82
C 15.56 12.59 64.44 7.41
`summarise()` has grouped output by 'school'. You can override using the
`.groups` argument.
# A tibble: 3 × 5
# Groups: school [3]
school Band Choir `No music` Other
<chr> <int> <int> <int> <int>
1 A 33 39 23 30
2 B 36 54 151 2
3 C 21 17 87 10
Code
ggplot(percentages, aes(fill=currentSchoolMusic, y=percentage, x=school)) +geom_bar(position="stack", stat="identity") +geom_text(aes(label =paste0(round(percentage), "%")), position =position_stack(vjust =0.5), size =3, color ="grey", fontface ="bold") +scale_fill_viridis(discrete = T) +theme_ipsum() +labs(x ="School", y ="%", fill ="School Music Electives")
Heat Map
Are the observed frequencies in elective participation by school significantly different from expected frequencies in the distribution?
# extracting adjusted residualsadjusted_residuals <-residuals(chisq_result, type ="pearson")# Converting adjusted residuals to a matrix formatadjusted_residuals_matrix <-matrix(adjusted_residuals, nrow =nrow(chisq_result$observed))# Reshaping data for ggplotlibrary(reshape2)
Attaching package: 'reshape2'
The following object is masked from 'package:tidyr':
smiths
Code
adjusted_residuals_df <-melt(adjusted_residuals_matrix)# Renaming levels in Var1 and Var2adjusted_residuals_df$Var1 <-factor(adjusted_residuals_df$Var1, levels =c("1", "2", "3"), labels =c("A", "B", "C"))adjusted_residuals_df$Var2 <-factor(adjusted_residuals_df$Var2, levels =c("1", "2", "3", "4"), labels =c("Band", "Choir", "No music", "Other music"))# Heatmap of adjusted residualsggplot(data = adjusted_residuals_df, aes(x = Var2, y = Var1, fill = value)) +geom_tile() +scale_fill_gradient2(low ="blue", mid ="white", high ="red", midpoint =0) +theme_minimal() +labs(x ="School Music Electives", y ="School", fill ="Adjusted Residuals")
Code
ggplot(data = adjusted_residuals_df, aes(x = Var2, y = Var1, fill = value)) +geom_tile() +scale_fill_viridis() +theme_minimal() +labs(x ="School Music Ellectives", y ="School", fill ="Adjusted Residuals")
Source Code
---title: "Data Viz"author: Diego Pintoformat: htmlself-contained: truetoc: truecode-fold: truecode-tools: truetheme: light: cosmoeditor_options: chunk_output_type: inline---```{r echo=FALSE}setwd("~/Desktop/R_projects/msc")rm(list = ls()) # Clear Global Environmentlibrary(ggplot2)library(tidyverse)library(hrbrthemes)library(dplyr)library(tidyr)library(viridis)library(cowplot)library(gridExtra)library(psych)library(fmsb) #radar charts``````{r echo=FALSE}data <- read_csv("data/middle_school_music_mindsets_clean_full.csv")data <- filter(data, complete.cases(data)) #only complete cases``````{r echo=FALSE}#Creating binary variable for enrollment in music electivesdata <- data %>% mutate(school_music_elective = ifelse(currentSchoolMusic == "No music", "No", "Yes"))```## Density Plots```{r}ggplot(data=data, aes(x=msc_full, group=currentSchoolMusic, fill=currentSchoolMusic)) +geom_density(adjust=1.5, alpha=.4) +theme_ipsum() +ggtitle("Enrollment by Elective Class")``````{r}ggplot(data=data, aes(x=msc_full, group=school_music_elective, fill=school_music_elective)) +geom_density(adjust=1.5, alpha=.4) +theme_ipsum() +ggtitle("Global Music Self-Concept by Elective Category")``````{r}ggplot(data=data, aes(x=musical_ability_msc, group=school_music_elective, fill=school_music_elective)) +geom_density(adjust=1.5, alpha=.4) +theme_ipsum() +ggtitle("Academic Music Self-Concept by Elective Category")``````{r}ggplot(data=data, aes(x=msc_no_ability, group=school_music_elective, fill=school_music_elective)) +geom_density(adjust=1.5, alpha=.4) +theme_ipsum() +ggtitle("Non-Academic Music Self-Concept by Elective Category")``````{r}ggplot(data=data, aes(x=msc_full, group=currentSchoolMusic, fill=currentSchoolMusic)) +geom_density(adjust=1.5) +theme_ipsum() +facet_wrap(~currentSchoolMusic) +theme(legend.position="none",panel.spacing =unit(0.1, "lines"),axis.ticks.x=element_blank() ) +ggtitle ("Enrollment by Elective Class")``````{r}mindset_plot_with_legend <-ggplot(data = data, aes(x = growth_mindset, group = school_music_elective, fill = school_music_elective)) +geom_density(adjust =1.5, alpha = .8) +scale_fill_manual(values =c("No"="#4E2A84", "Yes"="#E4E0EE")) +theme_ipsum() +labs(x ="Mindset of Music Ability", fill ="Enrolled in School Music?") +theme(legend.position ="bottom", legend.margin =margin(l =-10, unit ="pt"))``````{r}mindset_plot <-ggplot(data = data, aes(x = growth_mindset, group = school_music_elective, fill = school_music_elective)) +geom_density(alpha = .8) +#removed "adjust = 1.5" to show more nuance on the density linescale_fill_manual(values =c("No"="#4E2A84", "Yes"="#E4E0EE")) +theme_ipsum() +labs(x ="Mindset of Music Ability", fill ="Enrolled in School Music?") +theme(legend.position ="none", plot.title =element_text(size =10))``````{r}active_engagement_plot <-ggplot(data = data, aes(x = active_engagement, group = school_music_elective, fill = school_music_elective)) +geom_density(alpha = .8) +#removed "adjust = 1.5" to show more nuance on the density linescale_fill_manual(values =c("No"="#4E2A84", "Yes"="#E4E0EE")) +theme_ipsum() +labs(x ="Active Engagement", fill ="Enrolled in School Music?") +theme(legend.position ="none", plot.title =element_text(size =10))``````{r}#Combine plotscombined_plot <-grid.arrange(mindset_plot, active_engagement_plot, ncol =2)``````{r}# plot1 with modified legendplot1_legend <-ggplot(data, aes(x = growth_mindset, y = school_music_elective, col = school_music_elective)) +geom_point(size =4) +scale_color_manual(values =c("No"="#4E2A84", "Yes"="#E4E0EE"), name ="Enrolled in School Music?") +theme(legend.position ="bottom")# function to extract legend from plotget_only_legend <-function(plot) { plot_table <-ggplot_gtable(ggplot_build(plot)) legend_plot <-which(sapply(plot_table$grobs, function(x) x$name) =="guide-box") legend <- plot_table$grobs[[legend_plot]]return(legend)}# extracting modified legend from plot1 using the above functionlegend <-get_only_legend(plot1_legend)# final combined plot with shared modified legendgrid.arrange(combined_plot, legend, nrow =2, heights =c(10, 1))```## Histogram```{r}data %>%ggplot( aes(x=msc_full)) +geom_histogram(binwidth =4, fill ="#4E2A84", color ="#B6ACD1", alpha=0.9) +ggtitle("Global Music Self-Concept") +theme_ipsum() +theme(plot.title =element_text(size=15) )``````{r}#Distribution of Music Self-Concept Subscales and Mindsetdata %>%select(active_engagement_z:msc_full_z) %>%multi.hist() +theme_ipsum()``````{r}#Distribution of Music Participation Variablesdata %>%select(schoolChoir:selfTaught) %>%multi.hist() +theme_ipsum()``````{r}filtered_data <-subset(data, school_music_elective =="Yes")ggplot(filtered_data, aes(x = growth_mindset)) +geom_histogram(binwidth =1, fill ="#4E2A84", color ="white", alpha =0.7) +theme_minimal() +labs(title ="Mindset of Students Enrolled in Elective Music Classes",x ="Mindset Score",y ="Frequency")```## Box Plot```{r}data %>%ggplot( aes(x=currentSchoolMusic, y=msc_full, fill=currentSchoolMusic)) +geom_boxplot() +scale_fill_viridis(discrete =TRUE, alpha=0.6) +geom_jitter(color="black", size=0.4, alpha=0.9) +theme_ipsum() +theme(legend.position="none",plot.title =element_text(size=11) ) +ggtitle("Distribution by Elective Class") +xlab("")```## Violin Plots```{r}data %>%ggplot( aes(x=currentSchoolMusic, y=msc_full, fill=currentSchoolMusic)) +geom_violin() +scale_fill_viridis(discrete =TRUE, alpha=0.6) +geom_jitter(color="black", size=0.4, alpha=0.9) +theme_ipsum() +theme(legend.position="none",plot.title =element_text(size=11) ) +ggtitle("Global Music Self-Concept per Elective Class") +xlab("")``````{r}data %>%ggplot( aes(x=currentSchoolMusic, y=musical_ability_msc, fill=currentSchoolMusic)) +geom_violin() +scale_fill_viridis(discrete =TRUE, alpha=0.6) +geom_jitter(color="black", size=0.4, alpha=0.9) +theme_ipsum() +theme(legend.position="none",plot.title =element_text(size=11) ) +ggtitle("Academic Music Self-Concept per Elective Class") +xlab("")``````{r}data %>%ggplot( aes(x=currentSchoolMusic, y=msc_no_ability, fill=currentSchoolMusic)) +geom_violin() +scale_fill_viridis(discrete =TRUE, alpha=0.6) +geom_jitter(color="black", size=0.4, alpha=0.9) +theme_ipsum() +theme(legend.position="none",plot.title =element_text(size=11) ) +ggtitle("Non-Academic Music Self-Concept per Elective Class") +xlab("")```## Radar Charts### Full Sample```{r}data_subset <- data %>%select(growth_mindset_z:movement_dance_msc_z)average_values <-colMeans(data_subset)max_values <-apply(data_subset, 2, max)min_values <-apply(data_subset, 2, min)data_for_radar <-as.data.frame(rbind(max_values, min_values, average_values))radarchart(data_for_radar)radarchart(data_for_radar, axistype =1, pcol ="#4E2A84", # Color of the average valuesplwd =2, # Line widthplty =1, # Line typecglcol ="grey", # Color of the grid linescglty =1, # Type of the grid linescglwd =0.8, # Width of the grid linesaxislabcol ="grey", # Color of the axis labelsvlcex =0.8) # Control the size of variable labels```### Gender```{r}# Set graphic colorslibrary(RColorBrewer)coul <-brewer.pal(3, "BuPu")colors_border <- coullibrary(scales)colors_in <-alpha(coul,0.3)data_subset_gender <- data %>%select(gender, growth_mindset_z:movement_dance_msc_z) %>%filter(gender %in%c("Male", "Female")) %>%rename("Growth Mindset"= growth_mindset_z,"Movement & Dance"= movement_dance_msc_z,"Ideal Musical Self"= ideal_musical_self_msc_z,"Adaptive Musical Self"= adaptive_musical_self_msc_z,"Music Ability"= musical_ability_msc_z,"Community"= community_msc_z,"Mood Management"= mood_management_msc_z)# Group the data by 'gender' and calculate the mean for each variableaverage_by_gender <- data_subset_gender %>%group_by(gender) %>%summarise(across(everything(), mean, na.rm =TRUE))# Check the average data by genderprint(average_by_gender)# Create the max and min rows based on the entire datasetmax_values <-apply(data_subset_gender[, -which(names(data_subset_gender) =="gender")], 2, max)min_values <-apply(data_subset_gender[, -which(names(data_subset_gender) =="gender")], 2, min)# Combine the max, min, and average values for the two gender groups into a dataframedata_for_radar <-rbind(max_values, min_values, as.data.frame(average_by_gender[,-1]))# Ensure the data is a dataframe and proper column names are retaineddata_for_radar <-as.data.frame(data_for_radar)# Create a radar chart with multiple groups (one for each gender)radarchart(data_for_radar, axistype =1, pcol =c("#4E2A84", "#B6ACD1"),# Different colors for each genderplwd =2, # Line widthplty =1, # Line typecglcol ="grey", # Color of the grid linescglty =1, # Type of the grid linescglwd =0.8, # Width of the grid linesaxislabcol ="grey", # Color of the axis labelsvlcex =0.8) # Control the size of variable labels# Add a legend to distinguish between genderslegend("topright", legend =unique(data_subset_gender$gender), col =c("#4E2A84", "#B6ACD1"), lty =1, lwd =2)```### Electives```{r}data_subset_elective <- data %>%select(currentSchoolMusic, growth_mindset_z:movement_dance_msc_z) %>%rename("Growth Mindset"= growth_mindset_z,"Movement & Dance"= movement_dance_msc_z,"Ideal Musical Self"= ideal_musical_self_msc_z,"Adaptive Musical Self"= adaptive_musical_self_msc_z,"Music Ability"= musical_ability_msc_z,"Community"= community_msc_z,"Mood Management"= mood_management_msc_z)# Group the data by 'currentSchoolMusic' and calculate the mean for each variableaverage_by_elective <- data_subset_elective %>%group_by(currentSchoolMusic) %>%summarise(across(everything(), mean, na.rm =TRUE))# Check the average data by electiveprint(average_by_elective)# Create the max and min rows based on the entire datasetmax_values <-apply(data_subset_elective[, -which(names(data_subset_elective) =="currentSchoolMusic")], 2, max)min_values <-apply(data_subset_elective[, -which(names(data_subset_elective) =="currentSchoolMusic")], 2, min)# Combine the max, min, and average values for the gender groups into a dataframedata_for_radar <-rbind(max_values, min_values, as.data.frame(average_by_elective[,-1]))# Ensure the data is a dataframe and proper column names are retaineddata_for_radar <-as.data.frame(data_for_radar)# Create a radar chart with multiple groups (one for each elective)radarchart(data_for_radar, axistype =1, pcol =c("red", "blue", "green", "purple"),# Different colors for each electiveplwd =2, # Line widthplty =1, # Line typecglcol ="grey", # Color of the grid linescglty =1, # Type of the grid linescglwd =0.8, # Width of the grid linesaxislabcol ="grey", # Color of the axis labelsvlcex =0.8) # Control the size of variable labels# Add a legend to distinguish between electiveslegend("topright", legend =unique(data_subset_elective$currentSchoolMusic), col =c("red", "blue", "green", "purple"), lty =1, lwd =2)```### Music Elective (Yes or No) \$1```{r}data_subset_music_elective <- data %>%select(school_music_elective, growth_mindset, mood_management_msc:movement_dance_msc) %>%rename("Growth Mindset"= growth_mindset,"Movement & Dance"= movement_dance_msc,"Ideal Musical Self"= ideal_musical_self_msc,"Adaptive Musical Self"= adaptive_musical_self_msc,"Music Ability"= musical_ability_msc,"Community"= community_msc,"Mood Management"= mood_management_msc)# Group the data by 'school_music_elective' and calculate the mean for each variableaverage_by_music_elective <- data_subset_music_elective %>%group_by(school_music_elective) %>%summarise(across(everything(), mean, na.rm =TRUE))# Check the average data by music_electiveprint(average_by_music_elective)# Create the max and min rows based on the entire datasetmax_values <-apply(data_subset_music_elective[, -which(names(data_subset_music_elective) =="school_music_elective")], 2, max)min_values <-apply(data_subset_music_elective[, -which(names(data_subset_music_elective) =="school_music_elective")], 2, min)data_for_radar_music_elective <-as.data.frame(bind_rows(max_values, min_values, average_by_music_elective [,-1]))# Combine the max, min, and average values for the two music_elective groups into a dataframe#data_for_radar_music_elective <- rbind(max_values, min_values, as.data.frame(average_by_music_elective[,-1]))# Ensure the data is a dataframe and proper column names are retained#data_for_radar_music_elective <- as.data.frame(data_for_radar_music_elective)row.names(data_for_radar_music_elective) <-c("Max", "Min", unique(data_subset_music_elective$school_music_elective))# Create a radar chart with multiple groups (one for each music_elective)radarchart(data_for_radar_music_elective, axistype =1, pcol =c("#4E2A84", "#B6ACD1"),# Different colors for each electiveplwd =2, # Line widthplty =1, # Line typecglcol ="grey", # Color of the grid linescglty =1, # Type of the grid linescglwd =0.8, # Width of the grid linesaxislabcol ="grey", # Color of the axis labelsvlcex =0.8) # Control the size of variable labels# Add a legend to distinguish between genderslegend("bottomright", legend =rownames(data_for_radar_music_elective[-c(1,2),]), col =c("#4E2A84", "#B6ACD1"), lty =1, lwd =2, title ="School Music?")# Add titletitle(main ="Average by Enrollment in School Music")```### Music Elective (Yes or No) #2```{r}data_subset_music_elective <- data %>%select(school_music_elective, growth_mindset, mood_management_msc:movement_dance_msc) %>%rename("Growth Mindset"= growth_mindset,"Movement & Dance"= movement_dance_msc,"Ideal Musical Self"= ideal_musical_self_msc,"Adaptive Musical Self"= adaptive_musical_self_msc,"Music Ability"= musical_ability_msc,"Community"= community_msc,"Mood Management"= mood_management_msc)# Group the data by 'school_music_elective' and calculate the mean for each variableaverage_by_music_elective <- data_subset_music_elective %>%group_by(school_music_elective) %>%summarise(across(everything(), mean, na.rm =TRUE))# Check the average data by music_electiveprint(average_by_music_elective)# Calculate max and min valuesmax_values <-apply(data_subset_music_elective[, -which(names(data_subset_music_elective) =="school_music_elective")], 2, max)min_values <-apply(data_subset_music_elective[, -which(names(data_subset_music_elective) =="school_music_elective")], 2, min)# Convert max and min values to data framesmax_values_df <-as.data.frame(t(max_values), stringsAsFactors =FALSE)min_values_df <-as.data.frame(t(min_values), stringsAsFactors =FALSE)# Set explicit row names for max and minrow.names(max_values_df) <-"Max"row.names(min_values_df) <-"Min"# Prepare average values without the first column (school_music_elective)average_values_df <-as.data.frame(average_by_music_elective[,-1], stringsAsFactors =FALSE)# Store the row names for the factors (Yes, No)factor_levels <-unique(data_subset_music_elective$school_music_elective)# Set row names for the average valuesrow.names(average_values_df) <- factor_levels# Combine max, min, and average values into one data framedata_for_radar_music_elective <-rbind(max_values_df, min_values_df, average_values_df)# Now reassign the correct row names explicitly to avoid any swaprow.names(data_for_radar_music_elective) <-c("Max", "Min", factor_levels)# Check the resulting data frameprint(data_for_radar_music_elective)# Create a radar chart with multiple groups (one for each music_elective)radarchart(data_for_radar_music_elective, axistype =1, pcol =c("#4E2A84", "#B6ACD1"),# Different colors for each electiveplwd =2, # Line widthplty =1, # Line typecglcol ="grey", # Color of the grid linescglty =1, # Type of the grid linescglwd =0.8, # Width of the grid linesaxislabcol ="grey", # Color of the axis labelsvlcex =0.8) # Control the size of variable labels# Add a legend to distinguish between genderslegend("bottomright", legend =rownames(data_for_radar_music_elective[-c(1,2),]), col =c("#4E2A84", "#B6ACD1"), lty =1, lwd =2, title ="School Music?")# Add titletitle(main ="Average by Enrollment in School Music")```### Bar Plots### Music Class Enrollment (total and by school)```{r}#Count and Proportion per classmusic_elective_table <- data %>%count(currentSchoolMusic)%>%mutate("%"=round(n/sum(n)*100,2))#Proportion per school per classround(prop.table(table(data$school,data$currentSchoolMusic),1)*100, 2)#Percentagespercentages <- data %>%group_by(school, currentSchoolMusic) %>%summarise(count =n()) %>%group_by(school) %>%mutate(percentage = count/sum(count)*100)#Frequency data %>%group_by(school, currentSchoolMusic) %>%summarise(n =n()) %>%pivot_wider(names_from = currentSchoolMusic,values_from = n)ggplot(percentages, aes(fill=currentSchoolMusic, y=percentage, x=school)) +geom_bar(position="stack", stat="identity") +geom_text(aes(label =paste0(round(percentage), "%")), position =position_stack(vjust =0.5), size =3, color ="grey", fontface ="bold") +scale_fill_viridis(discrete = T) +theme_ipsum() +labs(x ="School", y ="%", fill ="School Music Electives")```## Heat Map### Are the observed frequencies in elective participation by school significantly different from expected frequencies in the distribution?```{r}# Perform Chi-square testchisq_result <-chisq.test(table(data$school, data$currentSchoolMusic))chisq_result# extracting adjusted residualsadjusted_residuals <-residuals(chisq_result, type ="pearson")# Converting adjusted residuals to a matrix formatadjusted_residuals_matrix <-matrix(adjusted_residuals, nrow =nrow(chisq_result$observed))# Reshaping data for ggplotlibrary(reshape2)adjusted_residuals_df <-melt(adjusted_residuals_matrix)# Renaming levels in Var1 and Var2adjusted_residuals_df$Var1 <-factor(adjusted_residuals_df$Var1, levels =c("1", "2", "3"), labels =c("A", "B", "C"))adjusted_residuals_df$Var2 <-factor(adjusted_residuals_df$Var2, levels =c("1", "2", "3", "4"), labels =c("Band", "Choir", "No music", "Other music"))# Heatmap of adjusted residualsggplot(data = adjusted_residuals_df, aes(x = Var2, y = Var1, fill = value)) +geom_tile() +scale_fill_gradient2(low ="blue", mid ="white", high ="red", midpoint =0) +theme_minimal() +labs(x ="School Music Electives", y ="School", fill ="Adjusted Residuals")ggplot(data = adjusted_residuals_df, aes(x = Var2, y = Var1, fill = value)) +geom_tile() +scale_fill_viridis() +theme_minimal() +labs(x ="School Music Ellectives", y ="School", fill ="Adjusted Residuals")```